xend: Balloon down memory to achive enough DMA32 memory for PV guests
authorKeir Fraser <keir.fraser@citrix.com>
Sat, 14 Nov 2009 08:09:50 +0000 (08:09 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Sat, 14 Nov 2009 08:09:50 +0000 (08:09 +0000)
with PCI pass-through to succesfully launch.

If the user hasn't used dom0_mem=3D bootup parameter, the privileged
domain usurps all of the memory. During launch of PV guests with PCI
pass-through we ratchet down the memory for the privileged domain to
the required memory for the PV guest. However, for PV guests with PCI
pass-through we do not take into account that the PV guest is going to
swap its SWIOTLB memory for DMA32 memory - in fact, swap 64MB of
it. This patch balloon's down the privileged domain so that there are
64MB of DMA32 memory available.

From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
tools/python/xen/lowlevel/xc/xc.c
tools/python/xen/xend/XendConfig.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/XendNode.py

index 12ea007a534f92f5238adeab58274b7b51836965..4c90579c681a81314b1f9016c7877bf653c6e872 100644 (file)
@@ -1059,6 +1059,7 @@ static PyObject *pyxc_physinfo(XcObject *self)
     int i, j, max_cpu_id;
     uint64_t free_heap;
     PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
+    PyObject *node_to_dma32_mem_obj;
     xc_cpu_to_node_t map[MAX_CPU_ID + 1];
     const char *virtcap_names[] = { "hvm", "hvm_directio" };
 
@@ -1128,10 +1129,27 @@ static PyObject *pyxc_physinfo(XcObject *self)
         Py_DECREF(pyint);
     }
 
+    xc_dom_loginit();
+    /* DMA memory. */
+    node_to_dma32_mem_obj = PyList_New(0);
+
+    for ( i = 0; i < info.nr_nodes; i++ )
+    {
+        PyObject *pyint;
+
+        xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
+        xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap);
+        pyint = PyInt_FromLong(free_heap / 1024);
+        PyList_Append(node_to_dma32_mem_obj, pyint);
+        Py_DECREF(pyint);
+    }
+
     PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
     Py_DECREF(node_to_cpu_obj);
     PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
     Py_DECREF(node_to_memory_obj);
+    PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
+    Py_DECREF(node_to_dma32_mem_obj);
  
     return ret_obj;
 #undef MAX_CPU_ID
index 6a168a264d144fdfc74da0ca91e8282678d8613d..0eadf343d3951aeff3e7ee60b87317d76a997204 100644 (file)
@@ -2111,6 +2111,13 @@ class XendConfig(dict):
     def is_hap(self):
         return self['platform'].get('hap', 0)
 
+    def is_pv_and_has_pci(self):
+        for dev_type, dev_info in self.all_devices_sxpr():
+            if dev_type != 'pci':
+                continue
+            return not self.is_hvm()
+        return False
+
     def update_platform_pci(self):
         pci = []
         for dev_type, dev_info in self.all_devices_sxpr():
index 212d1d3927e6f8e5adcbe0b944f0f1736c8e3df2..f6becb6bcceb9f0cb08fae5967fea2fa900c09fd 100644 (file)
@@ -2580,7 +2580,8 @@ class XendDomainInfo:
 
 
     def _setCPUAffinity(self):
-        """ Repin domain vcpus if a restricted cpus list is provided
+        """ Repin domain vcpus if a restricted cpus list is provided.
+            Returns the choosen node number.
         """
 
         def has_cpus():
@@ -2597,6 +2598,7 @@ class XendDomainInfo:
                         return True
             return False
 
+        index = 0
         if has_cpumap():
             for v in range(0, self.info['VCPUs_max']):
                 if self.info['vcpus_params'].has_key('cpumap%i' % v):
@@ -2647,6 +2649,54 @@ class XendDomainInfo:
                 cpumask = info['node_to_cpu'][index]
                 for v in range(0, self.info['VCPUs_max']):
                     xc.vcpu_setaffinity(self.domid, v, cpumask)
+        return index
+
+    def _freeDMAmemory(self, node):
+
+       # If we are PV and have PCI devices the guest will
+       # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32
+       # zone (under 4GB). To do so, we need to balloon down Dom0 to where
+       # there is enough (64MB) memory under the 4GB mark. This balloon-ing
+       # might take more memory out than just 64MB thought :-(
+       if not self.info.is_pv_and_has_pci():
+               return
+
+       retries = 2000
+       ask_for_mem = 0;
+       need_mem = 0
+       try:            
+           while (retries > 0):
+               physinfo = xc.physinfo()
+               free_mem = physinfo['free_memory']
+               nr_nodes = physinfo['nr_nodes']
+               node_to_dma32_mem = physinfo['node_to_dma32_mem']
+               if (node > nr_nodes):
+                    return;
+               # Extra 2MB above 64GB seems to do the trick.
+               need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node]
+               # our starting point. We ask just for the difference to
+               # be have an extra 64MB under 4GB.
+               ask_for_mem = max(need_mem, ask_for_mem);
+               if (need_mem > 0):
+                    log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. '
+                              'Asking for %dKiB', retries, need_mem,
+                              ask_for_mem)
+
+                    balloon.free(ask_for_mem, self)
+                    ask_for_mem = ask_for_mem + 2048;
+               else:
+                    # OK. We got enough DMA memory.
+                    break
+               retries  = retries - 1
+       except:
+           # This is best-try after all.
+           need_mem = max(1, need_mem);
+           pass
+
+       if (need_mem > 0):
+           log.warn('We tried our best to balloon down DMA memory to '
+                    'accomodate your PV guest. We need %dKiB extra memory.',
+                    need_mem)
 
     def _setSchedParams(self):
         if XendNode.instance().xenschedinfo() == 'credit':
@@ -2668,7 +2718,7 @@ class XendDomainInfo:
             # repin domain vcpus if a restricted cpus list is provided
             # this is done prior to memory allocation to aide in memory
             # distribution for NUMA systems.
-            self._setCPUAffinity()
+            node = self._setCPUAffinity()
 
             # Set scheduling parameters.
             self._setSchedParams()
@@ -2730,6 +2780,8 @@ class XendDomainInfo:
             if self.info.target():
                 self._setTarget(self.info.target())
 
+            self._freeDMAmemory(node)
+
             self._createDevices()
 
             self.image.cleanupTmpImages()
index 0fbefef6f8327335caaa53f10d451f05b8c716c0..bb1dad4eabbbc33d324a89b594826f64cbaa9a0a 100644 (file)
@@ -872,11 +872,11 @@ class XendNode:
         except:
             str='none\n'
         return str[:-1];
-    def format_node_to_memory(self, pinfo):
+    def format_node_to_memory(self, pinfo, key):
         str=''
         whitespace=''
         try:
-            node_to_memory=pinfo['node_to_memory']
+            node_to_memory=pinfo[key]
             for i in range(0, pinfo['nr_nodes']):
                 str+='%snode%d:%d\n' % (whitespace,
                                         i,
@@ -896,7 +896,10 @@ class XendNode:
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
         info['node_to_cpu']  = self.format_node_to_cpu(info)
-        info['node_to_memory'] = self.format_node_to_memory(info)
+        info['node_to_memory'] = self.format_node_to_memory(info,
+                                       'node_to_memory')
+        info['node_to_dma32_mem'] = self.format_node_to_memory(info,
+                                       'node_to_dma32_mem')
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -908,7 +911,8 @@ class XendNode:
                       'total_memory',
                       'free_memory',
                       'node_to_cpu',
-                      'node_to_memory'
+                      'node_to_memory',
+                      'node_to_dma32_mem'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]